# -*- coding: utf-8 -*-
import random
from scrapy import Request
from abchina.rules import *
from chinagd.spiders.base import BaseSpider


class SkuFrontSpider(BaseSpider):
    name = 'sku_front'
    # 常用链接
    index_url = 'https://e.abchina.com/qyjc/group/general_mall/index.html#/mall-home'
    root_catalog_url = 'https://e.abchina.com/qyjc/site/GMallCatg/QueryPageList'
    sub_catalog_url = 'https://e.abchina.com/qyjc/site/GMallCatg/GetCatgByFirst'
    sku_list_url = 'https://e.abchina.com/qyjc/site/ProductList/QueryProductPageList'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(SkuFrontSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        self.page_size = 100

    def start_requests(self):
        # GET----------------
        params = json.dumps({
            "CurPage": 1,
            "PageSize": 99,
            "catgId": 0,
            "hasSubject": 'false',
            "InterfaceName": "GMallCatg/QueryPageList"
        })
        yield Request(
            method='GET',
            url=self.root_catalog_url,
            callback=self.parse_root_catalog,
            headers={
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36',
                'X-EncryptType': 'encrypt',
            },
            meta={
                'reqType': 'catalog',
                'batchNo': self.batch_no,
                'params': params,
            },
            errback=self.error_back,
            priority=100,
            dont_filter=True
        )
        # GET----------------

    def parse_root_catalog(self, response):
        meta = response.meta
        cat_list = parse_root_catalog(response)
        random.shuffle(cat_list)
        if cat_list:
            self.logger.info('主分类->(%s)' % len(cat_list))
            for cat in cat_list:
                cat['batchNo'] = meta.get('batchNo', self.batch_no)
                yield cat

                # 采集子分类
                # GET----------------
                params = json.dumps({"id": cat.get('catalogId'), "InterfaceName": "GMallCatg/GetCatgByFirst"})
                yield Request(
                    method='GET',
                    url=self.sub_catalog_url,
                    callback=self.parse_sub_catalog,
                    headers={
                        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36',
                        'X-EncryptType': 'encrypt',
                    },
                    meta={
                        'reqType': 'catalog',
                        'batchNo': meta.get('batchNo', self.batch_no),
                        'parentId': cat.get('catalogId'),
                        'params': params,
                    },
                    errback=self.error_back,
                    priority=100,
                    dont_filter=True
                )
                # GET----------------

    def parse_sub_catalog(self, response):
        meta = response.meta
        cat_list = parse_sub_catalog(response)
        random.shuffle(cat_list)
        if cat_list:
            self.logger.info('子分类[%s]->(%s)' % (meta.get('parentId'), len(cat_list)))
            for cat in cat_list:
                cat['batchNo'] = meta.get('batchNo', self.batch_no)
                yield cat

                # 请求sku列表
                if cat.get('level') == 3:
                    page = 1
                    yield Request(
                        method='POST',
                        url=self.sku_list_url,
                        headers={
                            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36',
                            'X-EncryptType': 'encrypt',
                            'Content-Type': 'application/json'
                        },
                        body=json.dumps({
                            "SourceId": 0,
                            "ProductQuery": "",
                            "OrderType": "",
                            "CurPage": page,
                            "PageSize": self.page_size,
                            "BrandQuery": [],
                            "CatgId": cat.get('catalogId')
                        }),
                        meta={
                            'reqType': 'sku',
                            'batchNo': cat.get('batchNo', self.batch_no),
                            'page': page,
                            'pageSize': self.page_size,
                            'catalogId': cat.get('catalogId'),
                        },
                        callback=self.parse_sku,
                        errback=self.error_back,
                        priority=50,
                        dont_filter=True
                    )

    # 处理sku列表
    def parse_sku(self, response):
        meta = response.meta
        cur_page = meta.get('page')
        page_size = meta.get('pageSize')
        catalog_id = meta.get('catalogId')

        sku_list = parse_sku_front(response)
        if sku_list:
            # 处理本页
            self.logger.info('清单: cat=%s, page=%s, count=%s' % (catalog_id, cur_page, len(sku_list)))
            yield Box('sku', self.batch_no, sku_list)

            # 采集下一页
            next_page = cur_page + 1
            yield Request(
                method='POST',
                url=self.sku_list_url,
                callback=self.parse_sku,
                headers={
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36',
                    'X-EncryptType': 'encrypt',
                    'Content-Type': 'application/json'
                },
                body=json.dumps({
                    "SourceId": 0,
                    "ProductQuery": "",
                    "OrderType": "",
                    "CurPage": next_page,
                    "PageSize": page_size,
                    "BrandQuery": [],
                    "CatgId": catalog_id,
                    "InterfaceName": 'ProductList/QueryProductPageList'
                }),
                meta={
                    'reqType': 'sku',
                    'batchNo': self.batch_no,
                    'page': next_page,
                    'pageSize': page_size,
                    'catalogId': catalog_id,
                },
                errback=self.error_back,
                priority=50,
                dont_filter=True
            )
        else:
            self.logger.info('分页完成[%s]: page[%s]' % (catalog_id, cur_page))
